library(MASS) # Datasets library(mice) # Boys dataset library(dplyr) # Data manipulation library(magrittr) # Pipes library(ggplot2) # Plotting suite library(sf) # Spatial features
Statistical Programming in R
library(MASS) # Datasets library(mice) # Boys dataset library(dplyr) # Data manipulation library(magrittr) # Pipes library(ggplot2) # Plotting suite library(sf) # Spatial features
Source: Anscombe, F. J. (1973). “Graphs in Statistical Analysis”. American Statistician. 27 (1): 17–21.
Source: https://www.autodeskresearch.com/publications/samestats
base graphics in Rggplot2 graphicsbase graphics in Rplot(x = boys$hgt, y = boys$wgt, main = "Scatter plot",
xlab = "Height", ylab = "Weight", bty = "L")
plot(x = 1900+phones$year, y = phones$calls, type = "l", main = "Line chart",
xlab = "Year", ylab = "Phone calls in Belgium, millions", bty = "L")
counts <- table(boys$reg) barplot(counts, main="Bar chart", ylab = "N")
counts <- table(boys$reg) pie(x=counts, main="Pie chart")
hist(boys$hgt, main = "Histogram", xlab = "Height")
dens <- density(boys$hgt, na.rm = TRUE) plot(dens, main = "Density plot", xlab = "Height", bty = "L")
boxplot(boys$hgt ~ boys$reg, main = "Boxplot",
xlab = "Region", ylab = "Height")
boys %>% md.pattern() # from mice
## age reg wgt hgt bmi hc gen phb tv ## 223 1 1 1 1 1 1 1 1 1 0 ## 19 1 1 1 1 1 1 1 1 0 1 ## 1 1 1 1 1 1 1 1 0 1 1 ## 1 1 1 1 1 1 1 0 1 0 2 ## 437 1 1 1 1 1 1 0 0 0 3 ## 43 1 1 1 1 1 0 0 0 0 4 ## 16 1 1 1 0 0 1 0 0 0 5 ## 1 1 1 1 0 0 0 0 0 0 6 ## 1 1 1 0 1 0 1 0 0 0 5 ## 1 1 1 0 0 0 1 1 1 1 3 ## 1 1 1 0 0 0 0 1 1 1 4 ## 1 1 1 0 0 0 0 0 0 0 7 ## 3 1 0 1 1 1 1 0 0 0 4 ## 0 3 4 20 21 46 503 503 522 1622
plot() methodresult <- lm(age~wgt, boys) plot(result, which = 1)
ggplot2?Layered plotting based on the book The Grammar of Graphics by Leland Wilkinsons.
With ggplot2 you
ggplot2 then takes care of the details
1: Provide the data
boys %>% ggplot()
2: map variable to aesthetics
boys %>% ggplot(aes(x = age, y = bmi))
3: state which geometric object to display
boys %>% ggplot(aes(x = age, y = bmi)) + geom_point()
Create the plot
gg <- boys %>% ggplot(aes(x = age, y = bmi)) + geom_point(col = "dark green")
Add another layer (smooth fit line)
gg <- gg + geom_smooth(col = "dark blue")
Give it some labels and a nice look
gg <- gg + labs(x = "Age", y = "BMI", title = "BMI trend for boys") + theme_minimal()
plot(gg)
gg <-
boys %>%
filter(!is.na(reg)) %>%
ggplot(aes(x = age,
y = bmi,
size = hc,
colour = reg)) +
geom_point(alpha = 0.5) +
labs(title = "BMI trend for boys",
x = "Age",
y = "BMI",
size = "Head circumference",
colour = "Region") +
theme_minimal()
plot(gg)
geom_smooth
geom_density
sf packagedata.framesWe have time for a cursory introduction at most.
denmark <- st_read("DK_map.shp")
## Reading layer `DK_map' from data source `C:\Users\tgw513\Documents\GitHub\Rbosnia\Contents\Material\Part I - Data visualization\DK_map.shp' using driver `ESRI Shapefile' ## Simple feature collection with 306 features and 6 fields ## geometry type: POLYGON ## dimension: XY ## bbox: xmin: 441524.8 ymin: 6049785 xmax: 892800.8 ymax: 6402308 ## epsg (SRID): NA ## proj4string: +proj=utm +zone=32 +ellps=GRS80 +units=m +no_defs
plot(st_geometry(denmark))
denmark$proportion.over.70 <- denmark$over70/denmark$population
plot(denmark["proportion.over.70"],
main = "Proportion of population aged 70 years and above")
ggplotdenmark %>% ggplot(aes(fill=proportion.over.70)) + geom_sf()